/** * Licensed to WibiData, Inc. under one or more contributor license * agreements. See the NOTICE file distributed with this work for * additional information regarding copyright ownership. WibiData, Inc. * licenses this file to you under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or * implied. See the License for the specific language governing * permissions and limitations under the License. */ package org.kiji.maven.plugins.hbase; import java.io.File; import java.io.FileOutputStream; import java.io.IOException; import java.io.PrintWriter; import java.util.Arrays; import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Properties; import java.util.Set; import org.apache.commons.io.FileUtils; import org.apache.hadoop.conf.Configuration; import org.apache.maven.artifact.Artifact; import org.apache.maven.artifact.DependencyResolutionRequiredException; import org.apache.maven.plugin.AbstractMojo; import org.apache.maven.plugin.MojoExecutionException; import org.apache.maven.project.MavenProject; /** * A maven goal that starts a mini HBase cluster in a new daemon thread. * * <p>A new daemon thread is created that starts a mini HBase cluster. The main thread * blocks until the HBase cluster has full started. The daemon thread with the * in-process HBase cluster will continue to run in the background until stopped by the * 'stop' goal of the plugin.</p> * * <p>The configuration of the started mini HBase cluster will be written to a * hbase-site.xml file in the test classpath ('${basedir}/target/test-classes' by * default). The path to the generated configuration file may be customized with the * 'hbaseSiteFile' property</p> * * <p>A configuration index can be written by this goal. The configuration index is a file that * contains one line for each configuration file written by this goal, where the line contains the * path to the configuration file. By default, the goal does not write a configuration index. * Setting the property 'writeConfIndex' to true will cause a configuration index to be written. * By default, the configuration index will be written to * '${basedir}/target/test-classes/conf-index.conf'. The path to the generated configuration index * can be customized by setting the 'hbaseConfIndex' property.</p> * * @goal start * @phase pre-integration-test * @requiresDependencyResolution test */ public class StartMojo extends AbstractMojo { /** * If true, this goal should be a no-op. * * @parameter property="skip" default-value="false" */ private boolean mSkip; /** * If true, the Hadoop temporary directory (given by Hadoop configuration property hadoop.tmp * .dir) will be cleared before the cluster is started, then copied to the project's build * directory before the cluster is shutdown. * * @parameter property="saveHadoopTmpDir" expression="${save.hadoop.tmp}" default-value="false" * @required */ private boolean mSaveHadoopTmpDir; /** * The file that will store the configuration required to connect to the started mini HBase * cluster. This file will be generated by the goal. * * @parameter property="hbaseSiteFile" expression="${hbase.site.file}" default-value="${project.build.testOutputDirectory}/hbase-site.xml" * @required */ private File mHBaseSiteFile; /** * Whether two separate XML configuration files should be generated * (one for HDFS+MapReduce, and one for HBase). * * @parameter property="separateConfigurationFiles" expression="${separate.configuration.files}" default-value="true" */ private boolean mSeparateConfigurationFiles; /** * The file that will store the configuration required to connect to the started mini HDFS and * MapReduce clusters. This file will be generated by the goal. * * @parameter property="coreSiteFile" expression="${core.site.file}" default-value="${project.build.testOutputDirectory}/core-site.xml" * @required */ private File mCoreSiteFile; /** * If true, this goal should write an index file that provides the paths to the HBase * configuration files written by this goal. * * @parameter property="writeConfIndex" expression="${hbase.conf.index}" default-value="false" */ private boolean mWriteConfIndex; /** * The file that will store paths to the configuration files generated by the goal. This file * will be generated by the goal and will contain one line for each configuration file giving the * path to that configuration file. * * @parameter property="hbaseConfIndex" expression="${hbase.conf.index.file}" default-value="${project.build.testOutputDirectory}/conf-index.conf" */ private File mHBaseConfIndex; /** * If true, also start a mini MapReduce cluster. * * @parameter property="mapReduceEnabled" expression="${mapreduce.enabled}" default-value="false" */ private boolean mIsMapReduceEnabled; /** * Extra Hadoop configuration properties to use. * * @parameter property="hadoopConfiguration" */ private Properties mHadoopConfiguration; /** * A list of this plugin's dependency artifacts. * * @parameter default-value="${plugin.artifacts}" * @required * @readonly */ private List<Artifact> mPluginDependencyArtifacts; /** * The maven project this plugin is running within. * * @parameter default-value="${project}" * @required * @readonly */ private MavenProject mMavenProject; /** * Sets whether this goal should be a no-op. * * @param skip If true, this goal should do nothing. */ public void setSkip(boolean skip) { mSkip = skip; } /** * Sets whether the Hadoop temporary directory, given by hadoop.tmp.dir, should be cleared * before the cluster is started and copied to the project build directory before the cluster * is shutdown. * * @param saveTempDir If true, the directory will be copied to the project build directory * before the cluster is shutdown. */ public void setSaveHadoopTmpDir(boolean saveTempDir) { mSaveHadoopTmpDir = saveTempDir; } /** * Sets the file that we should write the HBase cluster configuration to. * * <p>Note: The property "hbaseSiteFile" defined in this mojo means this method must be * named setHbaseSiteFile instead of setHBaseSiteFile.</p> * * @param hbaseSiteFile The file we should write to. */ public void setHbaseSiteFile(File hbaseSiteFile) { mHBaseSiteFile = hbaseSiteFile; } /** * Sets whether separate core-site.xml and hbase-site.xml files * should be generated (otherwise they are combined into a single * hbase-site.xml file). * * @param separateConfigurationFiles Whether they should be separated. */ public void setSeparateConfigurationFiles(boolean separateConfigurationFiles) { mSeparateConfigurationFiles = separateConfigurationFiles; } /** * Sets the file that we should write the MapReduce/HDFS cluster configuration to. * * @param coreSiteFile The file we should write to. */ public void setCoreSiteFile(File coreSiteFile) { mCoreSiteFile = coreSiteFile; } /** * Sets whether this goal should write a configuration index file. * * @param writeConfIndex True if an index file should be written, false otherwise. */ public void setWriteConfIndex(boolean writeConfIndex) { mWriteConfIndex = writeConfIndex; } /** * Sets the file that the HBase configuration index should be written to. * * <p>Note: The property "hbaseConfIndex" defined in this mojo means this method should be named * setHbaseConfIndex.</p> * * @param hbaseConfIndex The file we should write to. */ public void setHbaseConfIndex(File hbaseConfIndex) { mHBaseConfIndex = hbaseConfIndex; } /** * Sets whether we should start a mini MapReduce cluster in addition to the HBase cluster. * * @param enabled Whether to start a mini MapReduce cluster. */ public void setMapReduceEnabled(boolean enabled) { mIsMapReduceEnabled = enabled; } /** * Sets Hadoop configuration properties. * * @param properties Hadoop configuration properties to use in the mini cluster. */ public void setHadoopConfiguration(Properties properties) { mHadoopConfiguration = properties; } /** * Starts a mini HBase cluster in a new thread. * * <p>This method is called by the maven plugin framework to run the goal.</p> * * @throws MojoExecutionException If there is a fatal error during this goal's execution. */ @Override public void execute() throws MojoExecutionException { if (mSkip) { getLog().info("Not starting an HBase cluster because skip=true."); return; } System.setProperty("java.class.path", getClassPath()); getLog().info("Set java.class.path to: " + System.getProperty("java.class.path")); // Set any extra hadoop options. Configuration conf = new Configuration(); if (null != mHadoopConfiguration) { for (Map.Entry<Object, Object> property : mHadoopConfiguration.entrySet()) { String confKey = property.getKey().toString(); String confValue = property.getValue().toString(); getLog().info("Setting hadoop conf property '" + confKey + "' to '" + confValue + "'"); conf.set(confKey, confValue); } } // If necessary, clear the Hadoop tmp dir. if (mSaveHadoopTmpDir) { removeHadoopTmpDir(conf); } // Start the cluster. try { MiniHBaseClusterSingleton.INSTANCE.startAndWaitUntilReady( getLog(), mIsMapReduceEnabled, conf); } catch (IOException e) { throw new MojoExecutionException("Unable to start HBase cluster.", e); } if (mSeparateConfigurationFiles) { // Write separate core-site and hbase-site files. writeHBaseSiteFile(conf); writeCoreSiteFile(conf); } else { // Combine the configs into a single hbase-site file. writeSiteFile(conf, mHBaseSiteFile); } // Write the configuration index. if (mWriteConfIndex) { writeConfigurationIndex(); } } /** * Deletes the directory given by hadoop.tmp.dir in the specified configuration. The * MapReduce cluster started by this plugin will store logs for job tasks in a job-specific * directory under hadoop.tmp.dir/userlogs. The * {@link org.apache.hadoop.hbase.HBaseTestingUtility} will delete log files on shutdown but * not the directory structure, making it hard to locate specific job logs after multiple runs. * Clearing hadoop.tmp.dir before the cluster starts again alleviates this problem. * * @param conf A Hadoop configuration used to determine the value of hadoop.tmp.dir. */ private void removeHadoopTmpDir(Configuration conf) { String hadoopTmpPath = conf.get("hadoop.tmp.dir"); File hadoopTmp = new File(hadoopTmpPath); if (hadoopTmp.exists()) { getLog().info("Deleting Hadoop tmp dir " + hadoopTmp.toString() + " because it already " + "exists."); try { FileUtils.deleteDirectory(hadoopTmp); getLog().info("Successfully deleted Hadoop tmp dir: " + hadoopTmp.toString()); } catch (IOException e) { getLog().warn("An existing Hadoop tmp dir could not be deleted.", e); } } } /** * Gets the runtime classpath required to run the mini clusters. * * <p>The maven classloading scheme is nonstandard. They only put the "classworlds" jar * on the classpath, and it takes care of ClassLoading the rest of the jars. This a * problem if we are going to start a mini MapReduce cluster. The TaskTracker will * start a child JVM with the same classpath as this process, and it won't have * configured the classworlds class loader. To work around this, we will put all of * our dependencies into the java.class.path system property, which will be read by * the TaskRunner's child JVM launcher to build the child JVM classpath.</p> * * <p>Note that when we say "all of our dependencies" we mean both the dependencies of * this plugin as well as the test classes and dependencies of the project that is * running the plugin. We need to include the latter on the classpath because tests are * still just .class files at integration-test-time. There will be no jars available * yet to put on the distributed cache via job.setJarByClass(). Hence, all of the * test-classes in the project running this plugin need to already be on the classpath * of the MapReduce cluster.<p> */ private String getClassPath() throws MojoExecutionException { // Maintain a set of classpath components added so we can de-dupe. Set<String> alreadyAddedComponents = new HashSet<String>(); // Use this to build up the classpath string. StringBuilder classpath = new StringBuilder(); // Add the existing classpath. String existingClasspath = System.getProperty("java.class.path"); classpath.append(existingClasspath); alreadyAddedComponents.addAll(Arrays.asList(existingClasspath.split(":"))); // Add the test classes and dependencies of the maven project running this plugin. // // Note: It is important that we add these classes and dependencies before we add this // plugin's dependencies in case the maven project needs to override a jar version. List<?> testClasspathComponents; try { testClasspathComponents = mMavenProject.getTestClasspathElements(); } catch (DependencyResolutionRequiredException e) { throw new MojoExecutionException("Unable to retrieve project test classpath", e); } for (Object testClasspathComponent : testClasspathComponents) { String dependency = testClasspathComponent.toString(); if (alreadyAddedComponents.contains(dependency)) { continue; } classpath.append(":"); classpath.append(dependency); alreadyAddedComponents.add(dependency); } // Add this plugin's dependencies. for (Artifact artifact : mPluginDependencyArtifacts) { String dependency = artifact.getFile().getPath(); if (alreadyAddedComponents.contains(dependency)) { continue; } classpath.append(":"); classpath.append(dependency); alreadyAddedComponents.add(dependency); } return classpath.toString(); } /** * Writes the HBase-specific contents of the specified configuration to the HBase site file. * * @param conf The configuration to write. * @throws MojoExecutionException If there is an error writing the file. */ private void writeHBaseSiteFile(Configuration conf) throws MojoExecutionException { writeSiteFile(getHBaseOnlyConfiguration(conf), mHBaseSiteFile); } /** * Writes the MapReduce/HDFS-specific contents of the specified configuration to the core * site file. * * @param conf The configuration to write. * @throws MojoExecutionException If there is an error writing the file. */ private void writeCoreSiteFile(Configuration conf) throws MojoExecutionException { writeSiteFile(getMapReduceOnlyConfiguration(conf), mCoreSiteFile); } /** * Writes the specified configuration to the specified file. * * @param conf The configuration to write. * @param siteFile The file to write the configuration to. * @throws MojoExecutionException If there is an error writing the file. */ private void writeSiteFile(Configuration conf, File siteFile) throws MojoExecutionException { // Create the parent directory for the site file if it does not already exist. createFileParentDir(siteFile); // Write the file. FileOutputStream fileOutputStream = null; try { fileOutputStream = new FileOutputStream(siteFile); conf.writeXml(fileOutputStream); } catch (IOException e) { throw new MojoExecutionException( "Unable to write to site file: " + siteFile.getPath(), e); } finally { closeFileOutputStream(fileOutputStream); } getLog().info("Wrote " + siteFile.getPath() + "."); } /** * Gets a new configuration created from the specified configuration, including only HBase * configuration variables. * * @param conf The configuration to filter. * @return A new configuration containing copies of the appropriate configuration variables. */ private Configuration getHBaseOnlyConfiguration(Configuration conf) { return getFilteredConfiguration(conf, true); } /** * Gets a new configuration created from the specified configuration, including only * MapReduce/HDFS configuration variables. * * @param conf The configuration to filter. * @return A new configuration containing copies of the appropriate configuration variables. */ private Configuration getMapReduceOnlyConfiguration(Configuration conf) { return getFilteredConfiguration(conf, false); } /** * Gets a new configuration created from the specified configuration, * including only MapReduce/HDFS configuration variables or HBase only configuration variables. * * @param conf The configuration to filter. * @param hBaseOnly <code>true</code> if only HBase configuration variables should be included, * <code>false</code> if only MapReduce/HDFS configuration variables should be included. * @return A new configuration with copies of the appropriate configuration variables. */ private Configuration getFilteredConfiguration(Configuration conf, boolean hBaseOnly) { Configuration filteredConf = new Configuration(false); for (Map.Entry<String, String> entry: conf) { boolean startsWithHBase = entry.getKey().startsWith("hbase"); if ((startsWithHBase && hBaseOnly) || (!startsWithHBase && !hBaseOnly)) { filteredConf.set(entry.getKey(), entry.getValue()); } } return filteredConf; } /** * Writes a configuration index. * * @throws MojoExecutionException If there is an error writing the configuration file. */ private void writeConfigurationIndex() throws MojoExecutionException { // Create the parent directory of the file we are writing. createFileParentDir(mHBaseConfIndex); // Write the file. FileOutputStream fileOutputStream = null; PrintWriter fileWriter = null; try { fileOutputStream = new FileOutputStream(mHBaseConfIndex); fileWriter = new PrintWriter(fileOutputStream); fileWriter.println(mHBaseSiteFile.getPath()); } catch (IOException e) { throw new MojoExecutionException( "Unable to write to configuration index file: " + mHBaseConfIndex.getPath(), e); } finally { if (null != fileWriter) { fileWriter.close(); } closeFileOutputStream(fileOutputStream); } getLog().info("Wrote " + mHBaseConfIndex.getPath() + "."); } /** * Gets the parent directory of the specified file. Creates the directory if it does not already * exist. * * @return The parent directory. * @throws MojoExecutionException If there is an error getting or creating the parent directory. */ private static File createFileParentDir(File file) throws MojoExecutionException { File parentDir = file.getParentFile(); if (null != parentDir && !parentDir.exists() && !parentDir.mkdirs()) { throw new MojoExecutionException( "Unable to create or access parent directory of: " + file.getParent()); } return parentDir; } /** * Closes the specified FileOutputStream. The specified stream may be null, in which case this * operation is a no-op. * * @throws MojoExecutionException If there is an error closing the stream. */ private static void closeFileOutputStream(FileOutputStream stream) throws MojoExecutionException { if (null != stream) { try { stream.close(); } catch (IOException e) { throw new MojoExecutionException ("Unable to close file stream.", e); } } } }